Purpose: Explore effects of monthly/annual water availability on Big-little difference using non-parametric Kolmogorov-Smirnov tests
18.1 Site info and daily data
Code
# site information and locationssiteinfo <-read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")siteinfo_sp <-st_as_sf(siteinfo, coords =c("long", "lat"), crs =4326)mapview(siteinfo_sp, zcol ="designation")
Code
# flow/yield (and temp) data dat <-read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_DailyWeekly.csv") %>%filter(!site_name %in%c("WoundedBuckCreek", "Brackett Creek"))# add water/climate year variablesdat <-add_date_variables(dat, dates = date, water_year_start =4)
18.2 Separate and join G-g
Code
# pull out big G datadat_big <- dat %>%filter(site_name %in%c("West Brook NWIS", "Paine Run 10", "Piney River 10", "Staunton River 10", "Spread Creek Dam", "BigCreekLower", "Shields River ab Smith NWIS", "Donner Blitzen River nr Frenchglen NWIS")) # pull out little G data, assign big G site namedat_little <- dat %>%filter(designation %in%c("little", "medium"), !subbasin %in%c("Coal Creek", "McGee Creek", "Duck Creek", "Flathead"),!site_name %in%c("West Brook NWIS", "West Brook 0", "Paine Run 10", "Piney River 10", "Staunton River 10", "Spread Creek Dam", "Shields River ab Smith NWIS", "BigCreekLower")) %>%mutate(site_name_big =ifelse(subbasin =="Big Creek", "BigCreekLower",ifelse(subbasin =="West Brook", "West Brook NWIS", ifelse(subbasin =="Paine Run", "Paine Run 10", ifelse(subbasin =="Piney River", "Piney River 10",ifelse(subbasin =="Staunton River", "Staunton River 10",ifelse(subbasin =="Shields River", "Shields River ab Smith NWIS",ifelse(subbasin =="Snake River", "Spread Creek Dam", "Donner Blitzen River nr Frenchglen NWIS"))))))))# Join big-little discharge datadat_Gg <- dat_little %>%select(site_name, site_name_big, basin, subbasin, date, Month, MonthName, WaterYear, Yield_filled_mm_7) %>%rename(yield_little = Yield_filled_mm_7) %>%left_join(dat_big %>%select(site_name, date, Yield_filled_mm_7) %>%rename(site_name_big = site_name, yield_big = Yield_filled_mm_7)) %>%drop_na() %>%mutate(MonthName =as.character(MonthName))# tabledat_Gg %>%group_by(subbasin) %>%summarize(site_name_big =unique(site_name_big)) %>%kable(caption ="Big G gage names for each focal sub-basin.")
Big G gage names for each focal sub-basin.
subbasin
site_name_big
Big Creek
BigCreekLower
Donner Blitzen
Donner Blitzen River nr Frenchglen NWIS
Paine Run
Paine Run 10
Shields River
Shields River ab Smith NWIS
Snake River
Spread Creek Dam
Staunton River
Staunton River 10
West Brook
West Brook NWIS
View unique little g site names
Code
sort(unique(dat_Gg$site_name))
[1] "Avery Brook" "Avery Brook NWIS"
[3] "Big Creek NWIS" "BigCreekMiddle"
[5] "BigCreekUpper" "Buck Creek"
[7] "Crandall Creek" "Deep Creek"
[9] "Donner Blitzen ab Fish NWIS" "Donner Blitzen ab Indian NWIS"
[11] "Donner Blitzen nr Burnt Car NWIS" "Dugout Creek"
[13] "Dugout Creek NWIS" "Fish Creek NWIS"
[15] "Grizzly Creek" "Grouse Creek"
[17] "Hallowat Creek NWIS" "HallowattCreekLower"
[19] "Jimmy Brook" "LangfordCreekLower"
[21] "LangfordCreekUpper" "Leidy Creek Mouth"
[23] "Leidy Creek Mouth NWIS" "Leidy Creek Upper"
[25] "Lodgepole Creek" "Mitchell Brook"
[27] "NF Spread Creek Lower" "NF Spread Creek Upper"
[29] "NicolaCreek" "Obear Brook Lower"
[31] "Paine Run 01" "Paine Run 02"
[33] "Paine Run 06" "Paine Run 07"
[35] "Paine Run 08" "Rock Creek"
[37] "Sanderson Brook" "SF Spread Creek Lower"
[39] "SF Spread Creek Lower NWIS" "SF Spread Creek Upper"
[41] "Shields River ab Dugout" "SkookoleelCreek"
[43] "Staunton River 02" "Staunton River 03"
[45] "Staunton River 06" "Staunton River 07"
[47] "Staunton River 09" "WernerCreek"
[49] "West Brook Lower" "West Brook Reservoir"
[51] "West Brook Upper" "West Whately Brook"
18.3 Compute Gg Difference
Code
mysites <-unique(dat_Gg$site_name)kscompare_list <-list()for (i in1:length(mysites)) { d <- dat_Gg %>%filter(site_name == mysites[i]) yrs <-unique(d$WaterYear) kscompare_list_yr <-list()for(j in1:length(yrs)) { dy <- d %>%filter(WaterYear == yrs[j]) mypvals_monthly <-tibble(site_name =rep(NA, times =12), site_name_big =rep(NA, times =12), WaterYear =rep(NA, times =12),Month =rep(NA, times =12),MonthName =rep(NA, times =12),days =rep(NA, times =12),stat =rep(NA, times =12),pval =rep(NA, times =12))for(k in1:12) { dym <- dy %>%filter(Month == k)if(dim(dym)[1] <25) next mytest <-ks.test(dym$yield_little, dym$yield_big, exact =TRUE) mypvals_monthly$site_name[k] <- mysites[i] mypvals_monthly$site_name_big[k] <-unique(dym$site_name_big) mypvals_monthly$WaterYear[k] <- yrs[j] mypvals_monthly$Month[k] <- k mypvals_monthly$MonthName[k] <-unique(dym$MonthName) mypvals_monthly$days[k] <-dim(dym)[1] mypvals_monthly$stat[k] <- mytest$statistic mypvals_monthly$pval[k] <- mytest$p.value } kscompare_list_yr[[j]] <- mypvals_monthly } kscompare_list[[i]] <-do.call(rbind, kscompare_list_yr)}kscompare <-do.call(rbind, kscompare_list) %>%drop_na()
View relationship between KS test statistic and p-value
Code
plot(pval ~ stat, kscompare, xlab ="KS test statistic", ylab ="p-value")
View distribution of KS test statistics and p-values
Code
hist(kscompare$stat)
Code
hist(kscompare$pval)
18.4 Calculate total yield
Code
# Get total annual and monthly Big G yield and join to KS diffs# get total annual yield by big G sitetotyield_annual <- dat_big %>%group_by(site_name, WaterYear, basin, subbasin) %>%summarize(days =n(), yield_annual =sum(Yield_filled_mm, na.rm =TRUE)) %>%filter(!is.na(yield_annual), days >=360) %>%ungroup() %>%rename(site_name_big = site_name) %>%select(site_name_big, WaterYear, yield_annual) %>%ungroup()# get total monthly yield by big G sitetotyield_monthly <- dat_big %>%group_by(site_name, WaterYear, basin, subbasin, Month) %>%summarize(days =n(), yield_monthly =sum(Yield_filled_mm, na.rm =TRUE)) %>%filter(!is.na(yield_monthly), days >=28) %>%ungroup() %>%rename(site_name_big = site_name) %>%select(site_name_big, WaterYear, Month, yield_monthly) %>%ungroup()
18.5 Explore Gg diff by water availability
Hypothesis: during wetter periods (months/years) flow regimes become more similar among locations within stream networks (i.e., positive relationship between monthly/annual total yield and KS-test p-value)
How might this vary among basins that differ in primary water source (rain vs. snow) and among sites within basins (due to surface vs. subsurface controls on flow)?
# G-g Difference (KS)Purpose: Explore effects of monthly/annual water availability on Big-little difference using non-parametric Kolmogorov-Smirnov tests```{r, include=FALSE}library(tidyverse)library(sf)library(mapview)library(fasstr)library(ggpubr)library(dygraphs)library(knitr)```## Site info and daily data```{r}# site information and locationssiteinfo <-read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")siteinfo_sp <-st_as_sf(siteinfo, coords =c("long", "lat"), crs =4326)mapview(siteinfo_sp, zcol ="designation")# flow/yield (and temp) data dat <-read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_DailyWeekly.csv") %>%filter(!site_name %in%c("WoundedBuckCreek", "Brackett Creek"))# add water/climate year variablesdat <-add_date_variables(dat, dates = date, water_year_start =4)```## Separate and join G-g```{r}# pull out big G datadat_big <- dat %>%filter(site_name %in%c("West Brook NWIS", "Paine Run 10", "Piney River 10", "Staunton River 10", "Spread Creek Dam", "BigCreekLower", "Shields River ab Smith NWIS", "Donner Blitzen River nr Frenchglen NWIS")) # pull out little G data, assign big G site namedat_little <- dat %>%filter(designation %in%c("little", "medium"), !subbasin %in%c("Coal Creek", "McGee Creek", "Duck Creek", "Flathead"),!site_name %in%c("West Brook NWIS", "West Brook 0", "Paine Run 10", "Piney River 10", "Staunton River 10", "Spread Creek Dam", "Shields River ab Smith NWIS", "BigCreekLower")) %>%mutate(site_name_big =ifelse(subbasin =="Big Creek", "BigCreekLower",ifelse(subbasin =="West Brook", "West Brook NWIS", ifelse(subbasin =="Paine Run", "Paine Run 10", ifelse(subbasin =="Piney River", "Piney River 10",ifelse(subbasin =="Staunton River", "Staunton River 10",ifelse(subbasin =="Shields River", "Shields River ab Smith NWIS",ifelse(subbasin =="Snake River", "Spread Creek Dam", "Donner Blitzen River nr Frenchglen NWIS"))))))))# Join big-little discharge datadat_Gg <- dat_little %>%select(site_name, site_name_big, basin, subbasin, date, Month, MonthName, WaterYear, Yield_filled_mm_7) %>%rename(yield_little = Yield_filled_mm_7) %>%left_join(dat_big %>%select(site_name, date, Yield_filled_mm_7) %>%rename(site_name_big = site_name, yield_big = Yield_filled_mm_7)) %>%drop_na() %>%mutate(MonthName =as.character(MonthName))# tabledat_Gg %>%group_by(subbasin) %>%summarize(site_name_big =unique(site_name_big)) %>%kable(caption ="Big G gage names for each focal sub-basin.")```View unique little g site names```{r}sort(unique(dat_Gg$site_name))```## Compute Gg Difference```{r}mysites <-unique(dat_Gg$site_name)kscompare_list <-list()for (i in1:length(mysites)) { d <- dat_Gg %>%filter(site_name == mysites[i]) yrs <-unique(d$WaterYear) kscompare_list_yr <-list()for(j in1:length(yrs)) { dy <- d %>%filter(WaterYear == yrs[j]) mypvals_monthly <-tibble(site_name =rep(NA, times =12), site_name_big =rep(NA, times =12), WaterYear =rep(NA, times =12),Month =rep(NA, times =12),MonthName =rep(NA, times =12),days =rep(NA, times =12),stat =rep(NA, times =12),pval =rep(NA, times =12))for(k in1:12) { dym <- dy %>%filter(Month == k)if(dim(dym)[1] <25) next mytest <-ks.test(dym$yield_little, dym$yield_big, exact =TRUE) mypvals_monthly$site_name[k] <- mysites[i] mypvals_monthly$site_name_big[k] <-unique(dym$site_name_big) mypvals_monthly$WaterYear[k] <- yrs[j] mypvals_monthly$Month[k] <- k mypvals_monthly$MonthName[k] <-unique(dym$MonthName) mypvals_monthly$days[k] <-dim(dym)[1] mypvals_monthly$stat[k] <- mytest$statistic mypvals_monthly$pval[k] <- mytest$p.value } kscompare_list_yr[[j]] <- mypvals_monthly } kscompare_list[[i]] <-do.call(rbind, kscompare_list_yr)}kscompare <-do.call(rbind, kscompare_list) %>%drop_na()```View relationship between KS test statistic and p-value```{r}plot(pval ~ stat, kscompare, xlab ="KS test statistic", ylab ="p-value")```View distribution of KS test statistics and p-values```{r}hist(kscompare$stat)hist(kscompare$pval)```## Calculate total yield```{r}# Get total annual and monthly Big G yield and join to KS diffs# get total annual yield by big G sitetotyield_annual <- dat_big %>%group_by(site_name, WaterYear, basin, subbasin) %>%summarize(days =n(), yield_annual =sum(Yield_filled_mm, na.rm =TRUE)) %>%filter(!is.na(yield_annual), days >=360) %>%ungroup() %>%rename(site_name_big = site_name) %>%select(site_name_big, WaterYear, yield_annual) %>%ungroup()# get total monthly yield by big G sitetotyield_monthly <- dat_big %>%group_by(site_name, WaterYear, basin, subbasin, Month) %>%summarize(days =n(), yield_monthly =sum(Yield_filled_mm, na.rm =TRUE)) %>%filter(!is.na(yield_monthly), days >=28) %>%ungroup() %>%rename(site_name_big = site_name) %>%select(site_name_big, WaterYear, Month, yield_monthly) %>%ungroup()```## Explore Gg diff by water availabilityHypothesis: during wetter periods (months/years) flow regimes become more similar among locations within stream networks (i.e., positive relationship between monthly/annual total yield and KS-test p-value)How might this vary among basins that differ in primary water source (rain vs. snow) and among sites within basins (due to surface vs. subsurface controls on flow)?```{r}# join KS comparison df with annual/monthly water availabilitykscompare <- kscompare %>%left_join(totyield_annual) %>%left_join(totyield_monthly) %>%mutate(MonthName =factor(MonthName, levels =c("Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Jan", "Feb", "Mar"))) %>%left_join(siteinfo %>%select(site_name, basin))```### Global relationshipPlot relationship between log(monthly total yield) and log(KS p-value) for all basins, sites, and months combined```{r}kscompare %>%ggplot(aes(x =log(yield_monthly), y =log(pval))) +geom_point(aes(group = MonthName, color = MonthName)) +geom_smooth(method ="lm") +geom_abline(slope =0, intercept =log(0.05), linetype ="dashed")```### Basin-level effects```{r fig.width=8, fig.height=7}# Plot all sites, facet by basinkscompare %>% ggplot(aes(x = log(yield_monthly), y = log(pval))) + geom_point(aes(group = MonthName, color = MonthName)) + facet_wrap(~ basin) + geom_smooth(method = "lm") + geom_abline(slope = 0, intercept = log(0.05), linetype = "dashed")```### Basin and site effects```{r}mybasins <-unique(kscompare$basin)myplots <-list()for (i in1:length(mybasins)) { myplots[[i]] <- kscompare %>%filter(basin == mybasins[i]) %>%ggplot(aes(x =log(yield_monthly), y =log(pval))) +geom_point(aes(group = MonthName, color = MonthName)) +facet_wrap(~ site_name) +geom_smooth(method ="lm") +geom_abline(slope =0, intercept =log(0.05), linetype ="dashed")}```::: panel-tabset#### Big Creek```{r, echo=FALSE, fig.height=8, fig.width=8}myplots[[1]]```#### West Brook```{r, echo=FALSE, fig.height=8, fig.width=8}myplots[[2]]```#### Paine Run```{r, echo=FALSE, fig.height=8, fig.width=8}myplots[[3]]```#### Staunton River```{r, echo=FALSE, fig.height=8, fig.width=8}myplots[[4]]```#### Shields River```{r, echo=FALSE, fig.height=8, fig.width=8}myplots[[5]]```#### Snake River```{r, echo=FALSE, fig.height=8, fig.width=8}myplots[[6]]```#### Donner Blitzen```{r, echo=FALSE, fig.height=8, fig.width=8}myplots[[7]]```:::